package com.example.demo.simple;

import java.util.HashSet;
import java.util.Set;

public class SimpleTokenizer {
    /**
     * 简单分词：中文按字符，英文按空格拆
     */
    public static Set<String> tokenize(String text) {
        Set<String> tokens = new HashSet<>();
        if (text == null || text.isEmpty()) {
            return tokens;
        }
        // 去掉空格
        text = text.replaceAll("\\s+", "");
        for (int i = 0; i < text.length(); i++) {
            tokens.add(String.valueOf(text.charAt(i)));
        }
        return tokens;
    }
}
